# Clean SSI Sample Data
# Created: 01.03.2019

rm(list = ls())

setwd("PATH TO DATA FOLDER")

x <- read.csv("AH_POBE_SSI_GenPop.csv", as.is = TRUE)

# Categorize types of screen outs

table(x$mob1, useNA = "always")
table(x$mob2, useNA = "always")

x$scrout_mob <- 0
x$scrout_mob[!is.na(x$mob1) | (!is.na(x$mob2) & x$mob2 == 2)] <- 1
table(x$scrout_mob)

table(x$c2, useNA = "always")

x$scrout_consent <- 0
x$scrout_consent[!is.na(x$c2)] <- 1
table(x$scrout_consent)

table(x$c5, useNA = "always")

x$scrout_col18 <- 0
x$scrout_col18[!is.na(x$c5)] <- 1
table(x$scrout_col18)

table(x$c6, useNA = "always")

x$scrout_java <- 0
x$scrout_java[!is.na(x$c6)] <- 1
table(x$scrout_java)

table(x$n_w_4, useNA = "always")
table(x$j_w_4, useNA = "always")
table(x$hm_k, useNA = "always")
table(x$ui_k, useNA = "always")
table(x$vh_js, useNA = "always")

x$scrout_speed <- 0
x$scrout_speed[(!is.na(x$n_w_4) & !is.na(x$j_w_4)) & (!is.na(x$hm_k) | !is.na(x$ui_k))
               & is.na(x$vh_js)] <- 1
table(x$scrout_speed)

table(x$hm_use, useNA = "always")
table(x$ui_use, useNA = "always")

x$complete <- 0
x$complete[!is.na(x$hm_use) | !is.na(x$ui_use)] <- 1
table(x$complete, useNA = "always")

# Create additional treatment assignment indicators
# For HMID policy arm

x$hm_treat_v <- NA
x$hm_treat_v[!is.na(x$hm_treat) & (x$hm_treat %in% c("hm_dr", "hm_dp"))] <- "direct"
table(x$hm_treat, x$hm_treat_v, useNA = "always")

x$hm_treat_v[!is.na(x$hm_treat) & (x$hm_treat %in% c("hm_ir", "hm_ip"))] <- "indirect"
table(x$hm_treat, x$hm_treat_v, useNA = "always")

x$hm_treat_d <- NA
x$hm_treat_d[!is.na(x$hm_treat) & (x$hm_treat %in% c("hm_dr", "hm_ir"))] <- "reg"
table(x$hm_treat, x$hm_treat_d, useNA = "always")

x$hm_treat_d[!is.na(x$hm_treat) & (x$hm_treat %in% c("hm_dp", "hm_ip"))] <- "pro"
table(x$hm_treat, x$hm_treat_d, useNA = "always")

# For UI policy arm

x$ui_treat_d <- NA
x$ui_treat_d[!is.na(x$ui_treat) & x$ui_treat == "ui_r"] <- "reg"
x$ui_treat_d[!is.na(x$ui_treat) & x$ui_treat == "ui_p"] <- "pro"
table(x$ui_treat, x$ui_treat_d, useNA = "always")

# For Nutrition Program

table(x$resframe, useNA = "always")
x$n_treat_v <- NA
x$n_treat_v[x$resframe == "d"] <- "direct"
x$n_treat_v[x$resframe == "i"] <- "indirect"
table(x$resframe, x$n_treat_v, useNA = "always")

# For Job Training Program (opposite as nutrition framing by design)

x$j_treat_v <- NA
x$j_treat_v[x$resframe == "d"] <- "indirect"
x$j_treat_v[x$resframe == "i"] <- "direct"
table(x$resframe, x$j_treat_v, useNA = "always")

# Create combined variable for Nutrition/Job Training primary outcome

table(x$n_d, useNA = "always")
table(x$n_i, useNA = "always")

x$n_r <- NA

x$n_r[!is.na(x$n_d) & !is.na(x$n_treat_v) & x$n_treat_v == "direct"] <-
  x$n_d[!is.na(x$n_d) & !is.na(x$n_treat_v) & x$n_treat_v == "direct"]

x$n_r[!is.na(x$n_i) & !is.na(x$n_treat_v) & x$n_treat_v == "indirect"] <-
  x$n_i[!is.na(x$n_i) & !is.na(x$n_treat_v) & x$n_treat_v == "indirect"]

table(x$n_r, useNA = "always")
table(x$n_r, x$n_d, useNA = "always")
table(x$n_r, x$n_i, useNA = "always")

x$j_r <- NA

x$j_r[!is.na(x$j_d) & !is.na(x$j_treat_v) & x$j_treat_v == "direct"] <-
  x$j_d[!is.na(x$j_d) & !is.na(x$j_treat_v) & x$j_treat_v == "direct"]

x$j_r[!is.na(x$j_i) & !is.na(x$j_treat_v) & x$j_treat_v == "indirect"] <-
  x$j_i[!is.na(x$j_i) & !is.na(x$j_treat_v) & x$j_treat_v == "indirect"]

table(x$j_r, useNA = "always")
table(x$j_r, x$j_d, useNA = "always")
table(x$j_r, x$j_i, useNA = "always")

# Recode post-survey measurement of pre-treatment covariates

table(x$ps0, useNA = "always")

x$college <- NA
x$college[!is.na(x$ps0) & x$ps0 < 5] <- 0
x$college[!is.na(x$ps0) & x$ps0 >= 5] <- 1
table(x$ps0, x$college, useNA = "always")

table(x$ps2.1, useNA = "always")

x$black <- NA
x$black[!is.na(x$ps2.1) & x$ps2.1 == 2] <- 1
x$black[!is.na(x$ps2.1) & x$ps2.1 != 2] <- 0

table(x$ps2.1, x$black, useNA = "always")

x$latino <- NA
x$latino[!is.na(x$ps2.1) & x$ps2.1 == 3] <- 1
x$latino[!is.na(x$ps2.1) & x$ps2.1 != 3] <- 0

table(x$ps2.1, x$latino, useNA = "always")

table(x$ps3.1, useNA = "always")
x$age <- NA
x$age[!is.na(x$ps3.1)] <- 2017 - (1919 + x$ps3.1[!is.na(x$ps3.1)])
head(x[!is.na(x$ps3.1), c("ps3.1", "age")])

table(x$ps4, useNA = "always")

x$female <- NA
x$female[!is.na(x$ps4) & x$ps4 == 1] <- 0
x$female[!is.na(x$ps4) & x$ps4 == 2] <- 1
table(x$ps4, x$female, useNA = "always")

# Partisanship

table(x$ps5, useNA = "always")

x$dem <- NA
x$dem[!is.na(x$ps5) & x$ps5 != 1] <- 0
x$dem[!is.na(x$ps5) & x$ps5 == 1] <- 1

table(x$ps5, x$dem, useNA = "always")

x$rep <- NA
x$rep[!is.na(x$ps5) & x$ps5 != 2] <- 0
x$rep[!is.na(x$ps5) & x$ps5 == 2] <- 1

table(x$ps5, x$rep, useNA = "always")

x$ind <- NA
x$ind[!is.na(x$ps5) & x$ps5 != 3] <- 0
x$ind[!is.na(x$ps5) & x$ps5 == 3] <- 1

table(x$ps5, x$ind, useNA = "always")

# Independent Leaners

table(x$ps6, useNA = "always")

x$dem_plus_lean <- NA
x$dem_plus_lean[(!is.na(x$dem) & x$dem == 1) | (!is.na(x$ind) & x$ind == 1 & !is.na(x$ps6) & x$ps6 == 1)] <- 1
x$dem_plus_lean[(!is.na(x$ps5) & x$ps5 %in% c(2, 4, 5)) | 
                  (!is.na(x$ind) & x$ind == 1 & !is.na(x$ps6) & x$ps6 %in% c(2, 3))] <- 0

table(x$dem, x$dem_plus_lean, useNA = "always")
table(x$ind, x$dem_plus_lean, useNA = "always")
table(x$ps5, x$dem_plus_lean, useNA = "always")
table(x$ps6, x$dem_plus_lean, useNA = "always")

x$rep_plus_lean <- NA
x$rep_plus_lean[(!is.na(x$rep) & x$rep == 1) | (!is.na(x$ind) & x$ind == 1 & !is.na(x$ps6) & x$ps6 == 2)] <- 1
x$rep_plus_lean[(!is.na(x$ps5) & x$ps5 %in% c(1, 4, 5)) | 
                  (!is.na(x$ind) & x$ind == 1 & !is.na(x$ps6) & x$ps6 %in% c(1, 3))] <- 0

table(x$rep, x$rep_plus_lean, useNA = "always")
table(x$ind, x$rep_plus_lean, useNA = "always")
table(x$ps5, x$rep_plus_lean, useNA = "always")
table(x$ps6, x$rep_plus_lean, useNA = "always")

# Recode Household Income

table(x$ps3, useNA = "always")
x$hhinc <- NA
x$hhinc[!is.na(x$ps3) & x$ps3 == 1] <- 10
x$hhinc[!is.na(x$ps3) & x$ps3 == 2] <- 30
x$hhinc[!is.na(x$ps3) & x$ps3 == 3] <- 57.5
x$hhinc[!is.na(x$ps3) & x$ps3 == 4] <- 87.5
x$hhinc[!is.na(x$ps3) & x$ps3 == 5] <- 150
x$hhinc[!is.na(x$ps3) & x$ps3 == 6] <- 200

# Create total diverted from program goal variable

x$hm_divert <- x$hm_w_2 + x$hm_w_3 + x$hm_w_4
x$ui_divert <- x$ui_w_2 + x$ui_w_3 + x$ui_w_4

# Subset to complete responses

y <- x[x$complete == 1, ]
